We start with observations of 793 synapses with 12 measurments formatted as a 793x12 matrix. The matrix is Z-scored column-wise and plotted in a pairs plot colored according to the labels given by the field expert.
dat <- read.csv("rorb_gaussianAvg_at.csv")
loc <- read.csv("rorb_gaussianAvg_at_orderLocations.csv")
gabaID <- read.csv("rorb_gaba.csv")
truth <- gaba <- gabaID$gaba
ccol <- c('blue', 'blue', 'blue', 'red', 'red', 'red', 'green', 'black', 'green', 'green', 'black', 'green')
ind <- order(ccol)
ccol <- sort(ccol)
dat <- dat[,ind]
sdat <- as.data.frame(scale(dat, center = TRUE, scale = TRUE))
pairs(sdat, col = gaba + 1, pch = 20, cex = 0.2,
main = "Pairs plot colored by true class")repHeat(sdat); title("Representative heatmap")We run the Z-scored data through PCA and plot the scree-plot with the elbows as given by Zhu and Ghodsi. A pairs plot is shown on the PCA data again colored by class labels.
pc1 <- princomp(sdat)
getElbows(pc1$sdev)pairs(pc1$scores, col = gaba +1, pch = 20, cex = 0.5)## [1] 5 7 10
Next, applying LDA and QDA iterating on the number of PCA dimension included.
ldaL <-
lapply(1:12,
function(x){
ldatmp <- lda(gaba ~ pc1$scores[, 1:x], CV = FALSE)
predict(ldatmp)
})
qdaL <-
lapply(1:12,
function(x){
qdatmp <- qda(gaba ~ pc1$scores[, 1:x], CV = FALSE)
predict(qdatmp)
})
ll <- sapply(lapply(ldaL, '[[', 1), function(x) sum(x != gaba)/length(gaba))
ql <- sapply(lapply(qdaL, '[[', 1), function(x) sum(x != gaba)/length(gaba))
#png(file = "~/Desktop/supRes.png", 480,480)
plot(x = 1:12, y = seq(0,0.13,length = 12), type = 'n', xlab = expression(hat(d)), ylab = "L")
points(ll, type = 'b', col = 'blue')
points(ql, type = 'b', col = 'darkred')
text(10,max(ql), label = "qda", col = 'darkred')
text(10, min(ll), label = "lda", col = 'blue', pos = 1)#dev.off()
qmm <- sapply(1:12, function(x) sum((gaba - qdaL[[x]]$post[, 1])^2))
lmm <- sapply(1:12, function(x) sum((gaba - ldaL[[x]]$post[, 1])^2))Data in \(M_{793 \times 12}\) is transformed using PCA with center=TRUE and scale=TRUE to \(D_{793 \times 12}\). Class labels, \(Y_{i\in [793]} \in {0,1}\) have been given by the field experts (quite possibly errorful) with proportions 708 and 85 out of 793 of non-gaba and gaba, respectively.
Let \(n_0, n_1\) be the numbers of points in class 0 and 1, respectively.
n0 <- 708
n1 <- 85
params <- list()
for(i in 1:12){
params[[i]] <- list()
for(j in unique(gaba)){
params[[i]][[as.character(j)]] <-
list(
means = colMeans(as.matrix(pc1$scores[gaba == j, 1:i])),
sigma = if(i == 1){
sd(pc1$scores[gaba == j, 1:i])
} else {
cov(pc1$scores[gaba == j, 1:i])
}
)
}}mont <- 500
truth <- c(rep(0, n0), rep(1,n1))
s <- list()
for(mi in 1:mont){
set.seed(mi)
s[[mi]] <- rbind(
rmvnorm(n0, mean = params[[12]]$'0'$means, sigma = params[[12]]$'0'$sigma),
rmvnorm(n1, mean = params[[12]]$'1'$means, sigma = params[[12]]$'1'$sigma)
)
}
d1 <- foreach(j = 1:12) %:%
foreach(x = 1:mont, .combine = 'rbind') %do% {
qdaR <- qda(truth ~ s[[x]][, 1:j], CV = FALSE)
rcl <- as.numeric(predict(qdaR)$class) - 1
qdaD <- qda(truth ~ s[[x]][, 1:j], CV = TRUE)
(Lr <- sum(rcl != truth)/length(truth))
(Ld <- sum((as.numeric(qdaD$class) - 1) != truth)/length(truth))
#assign(paste0("Lr", j), Lr)
#assign(paste0("Ld", j), Ld)
#cbind(
# eval(as.symbol(paste0("Lr", j))),
# eval(as.symbol(paste0("Ld", j)))
# )
cbind(Lr, Ld)
}
tmp <- cbind(Reduce(rbind, d1))
tmp1 <- as.data.frame(cbind(data.table::melt(tmp)))
tmp1$Var1 <- as.factor(rep(1:12, each = mont))
tmp1$Var2 <- as.factor(tmp1$Var2)
names(tmp1) <- c("dim", "L", "value")
p <- ggplot(data = tmp1, aes(x = L, y = value, fill = dim)) +
geom_boxplot(notch = TRUE) + facet_grid(. ~ dim)plot(p)dLines <-
foreach(mi = 1:mont) %:%
foreach(x = 1:12, .combine = 'rbind') %dopar% {
qdaR <- qda(truth ~ s[[mi]][, 1:x], CV = FALSE)
rcl <- as.numeric(predict(qdaR)$class) - 1
qdaD <- qda(truth ~ as.matrix(s[[x]][, 1:j]), CV = TRUE)
(Lr <- sum(rcl != truth)/length(truth))
(Ld <- sum((as.numeric(qdaD$class) - 1) != truth)/length(truth))
cbind(Lr, Ld, mi, x)
}
lineDat <- Reduce('rbind', dLines)
pLines <- ggplot(as.data.frame(lineDat), aes(x = x, group = mi)) + geom_line(aes(y = Lr, color = mi)) show(pLines)show(p1sig)## notch went outside hinges. Try setting notch=FALSE.